"""xwork_webcrawler.py 协程版百度图片搜索并批量下载
"""
import asyncio
import aiohttp
from urllib.parse import quote
import os
import re
import time
url = "https://image.baidu.com/search/flip?tn=baiduimage&word="
keyword = "CG原画"
folder = "img"
path = os.path.abspath(".")
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) \
            Gecko/20100101 Firefox/61.0'
}


async def fetch(session, url):
    async with session.get(url, timeout=20) as res:
        return await res.text()


async def store(session, url):
    t1 = time.perf_counter()
    async with session.get(url, timeout=20) as res:
        with open(os.path.join(path, folder, url.split("/")[-1]), "wb") as f:
            while True:
                chunk = await res.content.read(512)
                if not chunk:
                    break
                f.write(chunk)
    print(f"保存图片{url}耗时{time.perf_counter() - t1}秒。")


async def main():
    if not os.path.exists(folder):
        os.mkdir(folder)
    async with aiohttp.ClientSession(headers=headers) as session:
        t1 = time.perf_counter()
        html = await fetch(session, url + quote(keyword))
        links = re.findall(r'"objURL":"(.+?)"', html)
        print(f"提取文本耗时{time.perf_counter() - t1}秒。")
        for i in links:
            await asyncio.create_task(store(session, i))


if __name__ == "__main__":
    t1 = time.perf_counter()
    try:
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main())
    except Exception as e:
        print(repr(e))
    print(f"主程序耗时{time.perf_counter() - t1}秒。")
